From ed3055affba1c1f98d6dca78d64ae6dd2bd571d7 Mon Sep 17 00:00:00 2001 From: "kaf24@firebug.cl.cam.ac.uk" Date: Tue, 5 Apr 2005 12:07:37 +0000 Subject: [PATCH] bitkeeper revision 1.1236.58.1 (42527f89zgr9JJ0KbdZkIWCfo_KBfA) SMP timer and irq fixes for 2.6. Merge x86/64 time.c with i386. Signed-off-by: Keir Fraser --- .rootkeys | 1 - .../arch/xen/i386/kernel/smpboot.c | 65 +- .../arch/xen/i386/kernel/time.c | 126 ++- .../arch/xen/i386/mm/hypervisor.c | 4 +- .../arch/xen/x86_64/kernel/Makefile | 4 +- .../arch/xen/x86_64/kernel/time.c | 840 ------------------ 6 files changed, 91 insertions(+), 949 deletions(-) delete mode 100644 linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/time.c diff --git a/.rootkeys b/.rootkeys index be1feb1894..1e38beca2b 100644 --- a/.rootkeys +++ b/.rootkeys @@ -282,7 +282,6 @@ 424efaa6L1lrzwCIadTNxogSvljFwg linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/signal.c 424efaa61XzweJyW3v5Lb9egpe3rtw linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smp.c 424efaa778MkpdkAIq0An1FjQENN_Q linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smpboot.c -424efaa7vzbNdhwhkQPhs1V7LrAH4Q linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/time.c 424efaa7szEu90xkjpXk5TufZxxa4g linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c 424efaa6sJsuHdGIGxm0r-ugsss3OQ linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c 424efaa6xbX9LkKyaXvgbL9s_39Trw linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c index 0f271d80ad..70e0dbfdbf 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c @@ -437,21 +437,23 @@ void __init smp_callin(void) int cpucount; -static irqreturn_t local_debug_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t ldebug_interrupt( + int irq, void *dev_id, struct pt_regs *regs) { - return IRQ_HANDLED; } -static struct irqaction local_irq_debug = { - local_debug_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ldebug", - NULL, NULL -}; +static DEFINE_PER_CPU(int, ldebug_irq); +static char ldebug_name[NR_IRQS][15]; -void local_setup_debug(void) +void ldebug_setup(void) { - (void)setup_irq(bind_virq_to_irq(VIRQ_DEBUG), &local_irq_debug); + int cpu = smp_processor_id(); + + per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG); + sprintf(ldebug_name[cpu], "ldebug%d", cpu); + BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt, + SA_INTERRUPT, ldebug_name[cpu], NULL)); } @@ -472,7 +474,7 @@ static int __init start_secondary(void *unused) while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) rep_nop(); local_setup_timer(); - local_setup_debug(); /* XXX */ + ldebug_setup(); smp_intr_init(); local_irq_enable(); /* @@ -1329,36 +1331,27 @@ void __init smp_cpus_done(unsigned int max_cpus) } extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *); - -static struct irqaction reschedule_irq = { - smp_reschedule_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "reschedule", - NULL, NULL -}; - -extern irqreturn_t smp_invalidate_interrupt(int, void *, struct pt_regs *); - -static struct irqaction invalidate_irq = { - smp_invalidate_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "invalidate", - NULL, NULL -}; - extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *); -static struct irqaction call_function_irq = { - smp_call_function_interrupt, SA_INTERRUPT, CPU_MASK_NONE, - "call_function", NULL, NULL -}; +static DEFINE_PER_CPU(int, resched_irq); +static DEFINE_PER_CPU(int, callfunc_irq); +static char resched_name[NR_IRQS][15]; +static char callfunc_name[NR_IRQS][15]; void __init smp_intr_init(void) { + int cpu = smp_processor_id(); - (void)setup_irq( - bind_ipi_on_cpu_to_irq(smp_processor_id(), RESCHEDULE_VECTOR), - &reschedule_irq); - (void)setup_irq( - bind_ipi_on_cpu_to_irq(smp_processor_id(), INVALIDATE_TLB_VECTOR), - &invalidate_irq); - (void)setup_irq( - bind_ipi_on_cpu_to_irq(smp_processor_id(), CALL_FUNCTION_VECTOR), - &call_function_irq); + per_cpu(resched_irq, cpu) = + bind_ipi_on_cpu_to_irq(cpu, RESCHEDULE_VECTOR); + sprintf(resched_name[cpu], "resched%d", cpu); + BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt, + SA_INTERRUPT, resched_name[cpu], NULL)); + + per_cpu(callfunc_irq, cpu) = + bind_ipi_on_cpu_to_irq(cpu, CALL_FUNCTION_VECTOR); + sprintf(callfunc_name[cpu], "callfunc%d", cpu); + BUG_ON(request_irq(per_cpu(callfunc_irq, cpu), + smp_call_function_interrupt, + SA_INTERRUPT, callfunc_name[cpu], NULL)); } diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c index d0dec7f85f..689e587e16 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c @@ -77,6 +77,15 @@ u64 jiffies_64 = INITIAL_JIFFIES; EXPORT_SYMBOL(jiffies_64); +#if defined(__x86_64__) +unsigned long vxtime_hz = PIT_TICK_RATE; +struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */ +volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; +unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES; +struct timespec __xtime __section_xtime; +struct timezone __sys_tz __section_sys_tz; +#endif + unsigned long cpu_khz; /* Detected as we calibrate the TSC */ extern unsigned long wall_jiffies; @@ -111,8 +120,8 @@ static long last_rtc_update, last_update_to_xen; static long last_update_from_xen; /* UTC seconds when last read Xen clock. */ /* Keep track of last time we did processing/updating of jiffies and xtime. */ -u64 processed_system_time; /* System time (ns) at last processing. */ -DEFINE_PER_CPU(u64, processed_system_time); +static u64 processed_system_time; /* System time (ns) at last processing. */ +static DEFINE_PER_CPU(u64, processed_system_time); #define NS_PER_TICK (1000000000ULL/HZ) @@ -379,37 +388,49 @@ static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { time_t wtm_sec, sec; - s64 delta, nsec; + s64 delta, delta_cpu, nsec; long sec_diff, wtm_nsec; + int cpu = smp_processor_id(); do { __get_time_values_from_xen(); - delta = (s64)(shadow_system_time + - ((s64)cur_timer->get_offset() * - (s64)NSEC_PER_USEC) - - processed_system_time); + delta = delta_cpu = (s64)shadow_system_time + + ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC); + delta -= processed_system_time; + delta_cpu -= per_cpu(processed_system_time, cpu); } while (!TIME_VALUES_UP_TO_DATE); - if (unlikely(delta < 0)) { - printk("Timer ISR: Time went backwards: %lld %lld %lld %lld\n", - delta, shadow_system_time, + if (unlikely(delta < 0) || unlikely(delta_cpu < 0)) { + printk("Timer ISR/%d: Time went backwards: " + "delta=%lld cpu_delta=%lld shadow=%lld " + "off=%lld processed=%lld cpu_processed=%lld\n", + cpu, delta, delta_cpu, shadow_system_time, ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC), - processed_system_time); + processed_system_time, + per_cpu(processed_system_time, cpu)); return; } - /* Process elapsed jiffies since last call. */ + /* System-wide jiffy work. */ while (delta >= NS_PER_TICK) { delta -= NS_PER_TICK; processed_system_time += NS_PER_TICK; do_timer(regs); + } + + /* Local CPU jiffy work. */ + while (delta_cpu >= NS_PER_TICK) { + delta_cpu -= NS_PER_TICK; + per_cpu(processed_system_time, cpu) += NS_PER_TICK; update_process_times(user_mode(regs)); - if (regs) - profile_tick(CPU_PROFILING, regs); + profile_tick(CPU_PROFILING, regs); } + if (cpu != 0) + return; + /* * Take synchronised time from Xen once a minute if we're not * synchronised ourselves, and we haven't chosen to keep an independent @@ -617,10 +638,10 @@ void __init hpet_time_init(void) #endif /* Dynamically-mapped IRQ. */ -static int TIMER_IRQ; +static DEFINE_PER_CPU(int, timer_irq); static struct irqaction irq_timer = { - timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", + timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer0", NULL, NULL }; @@ -642,14 +663,23 @@ void __init time_init(void) set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); processed_system_time = shadow_system_time; + per_cpu(processed_system_time, 0) = processed_system_time; if (timer_tsc_init.init(NULL) != 0) BUG(); printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); - TIMER_IRQ = bind_virq_to_irq(VIRQ_TIMER); +#if defined(__x86_64__) + vxtime.mode = VXTIME_TSC; + vxtime.quot = (1000000L << 32) / vxtime_hz; + vxtime.tsc_quot = (1000L << 32) / cpu_khz; + vxtime.hz = vxtime_hz; + sync_core(); + rdtscll(vxtime.last_tsc); +#endif - (void)setup_irq(TIMER_IRQ, &irq_timer); + per_cpu(timer_irq, 0) = bind_virq_to_irq(VIRQ_TIMER); + (void)setup_irq(per_cpu(timer_irq, 0), &irq_timer); } /* Convert jiffies to system time. Call with xtime_lock held for reading. */ @@ -719,6 +749,7 @@ void time_resume(void) /* Reset our own concept of passage of system time. */ processed_system_time = shadow_system_time; + per_cpu(processed_system_time, 0) = processed_system_time; /* Accept a warp in UTC (wall-clock) time. */ last_seen_tv.tv_sec = 0; @@ -728,63 +759,20 @@ void time_resume(void) } #ifdef CONFIG_SMP - -static irqreturn_t local_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) -{ - s64 delta; - int cpu = smp_processor_id(); - - do { - __get_time_values_from_xen(); - - delta = (s64)(shadow_system_time + - ((s64)cur_timer->get_offset() * - (s64)NSEC_PER_USEC) - - per_cpu(processed_system_time, cpu)); - } - while (!TIME_VALUES_UP_TO_DATE); - - if (unlikely(delta < 0)) { - printk("Timer ISR/%d: Time went backwards: %lld %lld %lld %lld\n", - cpu, delta, shadow_system_time, - ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC), - processed_system_time); - return IRQ_HANDLED; - } - - /* Process elapsed jiffies since last call. */ - while (delta >= NS_PER_TICK) { - delta -= NS_PER_TICK; - per_cpu(processed_system_time, cpu) += NS_PER_TICK; - if (regs) - update_process_times(user_mode(regs)); -#if 0 - if (regs) - profile_tick(CPU_PROFILING, regs); -#endif - } - - return IRQ_HANDLED; -} - -static struct irqaction local_irq_timer = { - local_timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ltimer", - NULL, NULL -}; - +static char timer_name[NR_IRQS][15]; void local_setup_timer(void) { - int seq, time_irq; - int cpu = smp_processor_id(); + int seq, cpu = smp_processor_id(); do { - seq = read_seqbegin(&xtime_lock); - per_cpu(processed_system_time, cpu) = shadow_system_time; + seq = read_seqbegin(&xtime_lock); + per_cpu(processed_system_time, cpu) = shadow_system_time; } while (read_seqretry(&xtime_lock, seq)); - time_irq = bind_virq_to_irq(VIRQ_TIMER); - (void)setup_irq(time_irq, &local_irq_timer); + per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER); + sprintf(timer_name[cpu], "timer%d", cpu); + BUG_ON(request_irq(per_cpu(timer_irq, cpu), timer_interrupt, + SA_INTERRUPT, timer_name[cpu], NULL)); } #endif diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c index f09e2ab4f0..c7b79528ff 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c @@ -108,7 +108,7 @@ void xen_tlb_flush_mask(cpumask_t mask) { struct mmuext_op op; op.cmd = MMUEXT_TLB_FLUSH_MULTI; - op.cpuset = (unsigned long)mask.bits; + op.cpuset = mask.bits; BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } @@ -124,7 +124,7 @@ void xen_invlpg_mask(cpumask_t mask, unsigned long ptr) { struct mmuext_op op; op.cmd = MMUEXT_INVLPG_MULTI; - op.cpuset = (unsigned long)mask.bits; + op.cpuset = mask.bits; op.linear_addr = ptr & PAGE_MASK; BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile index ff5ae3603f..7759a1432c 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile @@ -8,11 +8,13 @@ CFLAGS += -Iarch/$(XENARCH)/kernel extra-y := head.o head64.o init_task.o obj-y := process.o signal.o entry.o traps.o \ - time.o ioport.o ldt.o setup.o \ + ioport.o ldt.o setup.o \ x8664_ksyms.o vsyscall.o \ setup64.o e820.o irq.o early_printk.o c-obj-y := semaphore.o i387.o sys_x86_64.o \ ptrace.o quirks.o syscall.o + +obj-y += ../../i386/time.o obj-y += ../../i386/kernel/timers/ s-obj-y := diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/time.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/time.c deleted file mode 100644 index c12f55a3c6..0000000000 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/time.c +++ /dev/null @@ -1,840 +0,0 @@ -/* - * linux/arch/i386/kernel/time.c - * - * Copyright (C) 1991, 1992, 1995 Linus Torvalds - * - * This file contains the PC-specific time handling details: - * reading the RTC at bootup, etc.. - * 1994-07-02 Alan Modra - * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime - * 1995-03-26 Markus Kuhn - * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 - * precision CMOS clock update - * 1996-05-03 Ingo Molnar - * fixed time warps in do_[slow|fast]_gettimeoffset() - * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 - * "A Kernel Model for Precision Timekeeping" by Dave Mills - * 1998-09-05 (Various) - * More robust do_fast_gettimeoffset() algorithm implemented - * (works with APM, Cyrix 6x86MX and Centaur C6), - * monotonic gettimeofday() with fast_get_timeoffset(), - * drift-proof precision TSC calibration on boot - * (C. Scott Ananian , Andrew D. - * Balsa , Philip Gladstone ; - * ported from 2.0.35 Jumbo-9 by Michael Krause ). - * 1998-12-16 Andrea Arcangeli - * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy - * because was not accounting lost_ticks. - * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli - * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to - * serialize accesses to xtime/lost_ticks). - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mach_time.h" - -#include -#include - -#include - -#include - -#include "io_ports.h" - -extern spinlock_t i8259A_lock; -int pit_latch_buggy; /* extern */ - -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - -#if defined(__x86_64__) -unsigned long vxtime_hz = PIT_TICK_RATE; - -struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */ - -volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; -unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES; -struct timespec __xtime __section_xtime; -struct timezone __sys_tz __section_sys_tz; - -static inline void rdtscll_sync(unsigned long *tsc) -{ -#ifdef CONFIG_SMP - sync_core(); -#endif - rdtscll(*tsc); -} -#endif - -u32 cpu_khz; /* Detected as we calibrate the TSC */ - -extern unsigned long wall_jiffies; - -spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; - -DEFINE_SPINLOCK(i8253_lock); -EXPORT_SYMBOL(i8253_lock); - -extern struct init_timer_opts timer_tsc_init; -extern struct timer_opts timer_tsc; -struct timer_opts *cur_timer = &timer_tsc; - -/* These are peridically updated in shared_info, and then copied here. */ -u32 shadow_tsc_stamp; -u64 shadow_system_time; -static u32 shadow_time_version; -static struct timeval shadow_tv; - -/* - * We use this to ensure that gettimeofday() is monotonically increasing. We - * only break this guarantee if the wall clock jumps backwards "a long way". - */ -static struct timeval last_seen_tv = {0,0}; - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST -/* Periodically propagate synchronised time base to the RTC and to Xen. */ -static long last_rtc_update, last_update_to_xen; -#endif - -/* Periodically take synchronised time base from Xen, if we need it. */ -static long last_update_from_xen; /* UTC seconds when last read Xen clock. */ - -/* Keep track of last time we did processing/updating of jiffies and xtime. */ -u64 processed_system_time; /* System time (ns) at last processing. */ -DEFINE_PER_CPU(u64, processed_system_time); - -#define NS_PER_TICK (1000000000ULL/HZ) - -#define HANDLE_USEC_UNDERFLOW(_tv) do { \ - while ((_tv).tv_usec < 0) { \ - (_tv).tv_usec += USEC_PER_SEC; \ - (_tv).tv_sec--; \ - } \ -} while (0) -#define HANDLE_USEC_OVERFLOW(_tv) do { \ - while ((_tv).tv_usec >= USEC_PER_SEC) { \ - (_tv).tv_usec -= USEC_PER_SEC; \ - (_tv).tv_sec++; \ - } \ -} while (0) -static inline void __normalize_time(time_t *sec, s64 *nsec) -{ - while (*nsec >= NSEC_PER_SEC) { - (*nsec) -= NSEC_PER_SEC; - (*sec)++; - } - while (*nsec < 0) { - (*nsec) += NSEC_PER_SEC; - (*sec)--; - } -} - -/* Does this guest OS track Xen time, or set its wall clock independently? */ -static int independent_wallclock = 0; -static int __init __independent_wallclock(char *str) -{ - independent_wallclock = 1; - return 1; -} -__setup("independent_wallclock", __independent_wallclock); -#define INDEPENDENT_WALLCLOCK() \ - (independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN)) - -/* - * Reads a consistent set of time-base values from Xen, into a shadow data - * area. Must be called with the xtime_lock held for writing. - */ -static void __get_time_values_from_xen(void) -{ - shared_info_t *s = HYPERVISOR_shared_info; - - do { - shadow_time_version = s->time_version2; - rmb(); - shadow_tv.tv_sec = s->wc_sec; - shadow_tv.tv_usec = s->wc_usec; - shadow_tsc_stamp = (u32)s->tsc_timestamp; - shadow_system_time = s->system_time; - rmb(); - } - while (shadow_time_version != s->time_version1); - - cur_timer->mark_offset(); -} - -#define TIME_VALUES_UP_TO_DATE \ - ({ rmb(); (shadow_time_version == HYPERVISOR_shared_info->time_version2); }) - -/* - * This version of gettimeofday has microsecond resolution - * and better than microsecond precision on fast x86 machines with TSC. - */ -void do_gettimeofday(struct timeval *tv) -{ - unsigned long seq; - unsigned long usec, sec; - unsigned long max_ntp_tick; - unsigned long flags; - s64 nsec; - - do { - unsigned long lost; - - seq = read_seqbegin(&xtime_lock); - - usec = cur_timer->get_offset(); - lost = jiffies - wall_jiffies; - - /* - * If time_adjust is negative then NTP is slowing the clock - * so make sure not to go into next possible interval. - * Better to lose some accuracy than have time go backwards.. - */ - if (unlikely(time_adjust < 0)) { - max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj; - usec = min(usec, max_ntp_tick); - - if (lost) - usec += lost * max_ntp_tick; - } - else if (unlikely(lost)) - usec += lost * (USEC_PER_SEC / HZ); - - sec = xtime.tv_sec; - usec += (xtime.tv_nsec / NSEC_PER_USEC); - - nsec = shadow_system_time - processed_system_time; - __normalize_time(&sec, &nsec); - usec += (long)nsec / NSEC_PER_USEC; - - if (unlikely(!TIME_VALUES_UP_TO_DATE)) { - /* - * We may have blocked for a long time, - * rendering our calculations invalid - * (e.g. the time delta may have - * overflowed). Detect that and recalculate - * with fresh values. - */ - write_seqlock_irqsave(&xtime_lock, flags); - __get_time_values_from_xen(); - write_sequnlock_irqrestore(&xtime_lock, flags); - continue; - } - } while (read_seqretry(&xtime_lock, seq)); - - while (usec >= USEC_PER_SEC) { - usec -= USEC_PER_SEC; - sec++; - } - - /* Ensure that time-of-day is monotonically increasing. */ - if ((sec < last_seen_tv.tv_sec) || - ((sec == last_seen_tv.tv_sec) && (usec < last_seen_tv.tv_usec))) { - sec = last_seen_tv.tv_sec; - usec = last_seen_tv.tv_usec; - } else { - last_seen_tv.tv_sec = sec; - last_seen_tv.tv_usec = usec; - } - - tv->tv_sec = sec; - tv->tv_usec = usec; -} - -EXPORT_SYMBOL(do_gettimeofday); - -int do_settimeofday(struct timespec *tv) -{ - time_t wtm_sec, sec = tv->tv_sec; - long wtm_nsec; - s64 nsec; - struct timespec xentime; - - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - if (!INDEPENDENT_WALLCLOCK()) - return 0; /* Silent failure? */ - - write_seqlock_irq(&xtime_lock); - - /* - * Ensure we don't get blocked for a long time so that our time delta - * overflows. If that were to happen then our shadow time values would - * be stale, so we can retry with fresh ones. - */ - again: - nsec = (s64)tv->tv_nsec - - ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC); - if (unlikely(!TIME_VALUES_UP_TO_DATE)) { - __get_time_values_from_xen(); - goto again; - } - - __normalize_time(&sec, &nsec); - set_normalized_timespec(&xentime, sec, nsec); - - /* - * This is revolting. We need to set "xtime" correctly. However, the - * value in this location is the value at the most recent update of - * wall time. Discover what correction gettimeofday() would have - * made, and then undo it! - */ - nsec -= (jiffies - wall_jiffies) * TICK_NSEC; - - nsec -= (shadow_system_time - processed_system_time); - - __normalize_time(&sec, &nsec); - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); - - set_normalized_timespec(&xtime, sec, nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; - - /* Reset all our running time counts. They make no sense now. */ - last_seen_tv.tv_sec = 0; - last_update_from_xen = 0; - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - if (xen_start_info.flags & SIF_INITDOMAIN) { - dom0_op_t op; - last_rtc_update = last_update_to_xen = 0; - op.cmd = DOM0_SETTIME; - op.u.settime.secs = xentime.tv_sec; - op.u.settime.usecs = xentime.tv_nsec / NSEC_PER_USEC; - op.u.settime.system_time = shadow_system_time; - write_sequnlock_irq(&xtime_lock); - HYPERVISOR_dom0_op(&op); - } else -#endif - write_sequnlock_irq(&xtime_lock); - - clock_was_set(); - return 0; -} - -EXPORT_SYMBOL(do_settimeofday); - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST -static int set_rtc_mmss(unsigned long nowtime) -{ - int retval; - - /* gets recalled with irq locally disabled */ - spin_lock(&rtc_lock); - if (efi_enabled) - retval = efi_set_rtc_mmss(nowtime); - else - retval = mach_set_rtc_mmss(nowtime); - spin_unlock(&rtc_lock); - - return retval; -} -#endif - -/* monotonic_clock(): returns # of nanoseconds passed since time_init() - * Note: This function is required to return accurate - * time even in the absence of multiple timer ticks. - */ -unsigned long long monotonic_clock(void) -{ - return cur_timer->monotonic_clock(); -} -EXPORT_SYMBOL(monotonic_clock); - -#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) -unsigned long profile_pc(struct pt_regs *regs) -{ - unsigned long pc = instruction_pointer(regs); - - if (in_lock_functions(pc)) - return *(unsigned long *)(regs->ebp + 4); - - return pc; -} -EXPORT_SYMBOL(profile_pc); -#endif - -/* - * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick - */ -static inline void do_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) -{ - time_t wtm_sec, sec; - s64 delta, nsec; - long sec_diff, wtm_nsec; - - do { - __get_time_values_from_xen(); - - delta = (s64)(shadow_system_time + - ((s64)cur_timer->get_offset() * - (s64)NSEC_PER_USEC) - - processed_system_time); - } - while (!TIME_VALUES_UP_TO_DATE); - - if (unlikely(delta < 0)) { - printk("Timer ISR: Time went backwards: %lld %lld %lld %lld\n", - delta, shadow_system_time, - ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC), - processed_system_time); - return; - } - - /* Process elapsed jiffies since last call. */ - while (delta >= NS_PER_TICK) { - delta -= NS_PER_TICK; - processed_system_time += NS_PER_TICK; - do_timer(regs); - update_process_times(user_mode(regs)); - if (regs) - profile_tick(CPU_PROFILING, regs); - } - - /* - * Take synchronised time from Xen once a minute if we're not - * synchronised ourselves, and we haven't chosen to keep an independent - * time base. - */ - if (!INDEPENDENT_WALLCLOCK() && - ((time_status & STA_UNSYNC) != 0) && - (xtime.tv_sec > (last_update_from_xen + 60))) { - /* Adjust shadow for jiffies that haven't updated xtime yet. */ - shadow_tv.tv_usec -= - (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ); - HANDLE_USEC_UNDERFLOW(shadow_tv); - - /* - * Reset our running time counts if they are invalidated by - * a warp backwards of more than 500ms. - */ - sec_diff = xtime.tv_sec - shadow_tv.tv_sec; - if (unlikely(abs(sec_diff) > 1) || - unlikely(((sec_diff * USEC_PER_SEC) + - (xtime.tv_nsec / NSEC_PER_USEC) - - shadow_tv.tv_usec) > 500000)) { -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - last_rtc_update = last_update_to_xen = 0; -#endif - last_seen_tv.tv_sec = 0; - } - - /* Update our unsynchronised xtime appropriately. */ - sec = shadow_tv.tv_sec; - nsec = shadow_tv.tv_usec * NSEC_PER_USEC; - - __normalize_time(&sec, &nsec); - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); - - set_normalized_timespec(&xtime, sec, nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - - last_update_from_xen = sec; - } - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - if (!(xen_start_info.flags & SIF_INITDOMAIN)) - return; - - /* Send synchronised time to Xen approximately every minute. */ - if (((time_status & STA_UNSYNC) == 0) && - (xtime.tv_sec > (last_update_to_xen + 60))) { - dom0_op_t op; - struct timeval tv; - - tv.tv_sec = xtime.tv_sec; - tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC; - tv.tv_usec += (jiffies - wall_jiffies) * (USEC_PER_SEC/HZ); - HANDLE_USEC_OVERFLOW(tv); - - op.cmd = DOM0_SETTIME; - op.u.settime.secs = tv.tv_sec; - op.u.settime.usecs = tv.tv_usec; - op.u.settime.system_time = shadow_system_time; - HYPERVISOR_dom0_op(&op); - - last_update_to_xen = xtime.tv_sec; - } - - /* - * If we have an externally synchronized Linux clock, then update - * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be - * called as close as possible to 500 ms before the new second starts. - */ - if ((time_status & STA_UNSYNC) == 0 && - xtime.tv_sec > last_rtc_update + 660 && - (xtime.tv_nsec / 1000) - >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 && - (xtime.tv_nsec / 1000) - <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) { - /* horrible...FIXME */ - if (efi_enabled) { - if (efi_set_rtc_mmss(xtime.tv_sec) == 0) - last_rtc_update = xtime.tv_sec; - else - last_rtc_update = xtime.tv_sec - 600; - } else if (set_rtc_mmss(xtime.tv_sec) == 0) - last_rtc_update = xtime.tv_sec; - else - last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */ - } -#endif -} - -/* - * This is the same as the above, except we _also_ save the current - * Time Stamp Counter value at the time of the timer interrupt, so that - * we later on can estimate the time of day more exactly. - */ -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - /* - * Here we are in the timer irq handler. We just have irqs locally - * disabled but we don't know if the timer_bh is running on the other - * CPU. We need to avoid to SMP race with it. NOTE: we don' t need - * the irq version of write_lock because as just said we have irq - * locally disabled. -arca - */ - write_seqlock(&xtime_lock); - do_timer_interrupt(irq, NULL, regs); - write_sequnlock(&xtime_lock); - return IRQ_HANDLED; -} - -/* not static: needed by APM */ -unsigned long get_cmos_time(void) -{ - unsigned long retval; - - spin_lock(&rtc_lock); - - if (efi_enabled) - retval = efi_get_time(); - else - retval = mach_get_cmos_time(); - - spin_unlock(&rtc_lock); - - return retval; -} - -static long clock_cmos_diff, sleep_start; - -static int timer_suspend(struct sys_device *dev, u32 state) -{ - /* - * Estimate time zone so that set_time can update the clock - */ - clock_cmos_diff = -get_cmos_time(); - clock_cmos_diff += get_seconds(); - sleep_start = get_cmos_time(); - return 0; -} - -static int timer_resume(struct sys_device *dev) -{ - unsigned long flags; - unsigned long sec; - unsigned long sleep_length; - -#ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled()) - hpet_reenable(); -#endif - sec = get_cmos_time() + clock_cmos_diff; - sleep_length = (get_cmos_time() - sleep_start) * HZ; - write_seqlock_irqsave(&xtime_lock, flags); - xtime.tv_sec = sec; - xtime.tv_nsec = 0; - write_sequnlock_irqrestore(&xtime_lock, flags); - jiffies += sleep_length; - wall_jiffies += sleep_length; - return 0; -} - -static struct sysdev_class timer_sysclass = { - .resume = timer_resume, - .suspend = timer_suspend, - set_kset_name("timer"), -}; - - -/* XXX this driverfs stuff should probably go elsewhere later -john */ -static struct sys_device device_timer = { - .id = 0, - .cls = &timer_sysclass, -}; - -static int time_init_device(void) -{ - int error = sysdev_class_register(&timer_sysclass); - if (!error) - error = sysdev_register(&device_timer); - return error; -} - -device_initcall(time_init_device); - -#ifdef CONFIG_HPET_TIMER -extern void (*late_time_init)(void); -/* Duplicate of time_init() below, with hpet_enable part added */ -void __init hpet_time_init(void) -{ - xtime.tv_sec = get_cmos_time(); - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); - - if (hpet_enable() >= 0) { - printk("Using HPET for base-timer\n"); - } - - cur_timer = select_timer(); - printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); - - time_init_hook(); -} -#endif - -/* Dynamically-mapped IRQ. */ -static int TIMER_IRQ; - -static struct irqaction irq_timer = { - timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", - NULL, NULL -}; - -void __init time_init(void) -{ -#ifdef CONFIG_HPET_TIMER - if (is_hpet_capable()) { - /* - * HPET initialization needs to do memory-mapped io. So, let - * us do a late initialization after mem_init(). - */ - late_time_init = hpet_time_init; - return; - } -#endif - __get_time_values_from_xen(); - xtime.tv_sec = shadow_tv.tv_sec; - xtime.tv_nsec = shadow_tv.tv_usec * NSEC_PER_USEC; - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); - processed_system_time = shadow_system_time; - - if (timer_tsc_init.init(NULL) != 0) - BUG(); - printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); - -#if defined(__x86_64__) - vxtime.mode = VXTIME_TSC; - vxtime.quot = (1000000L << 32) / vxtime_hz; - vxtime.tsc_quot = (1000L << 32) / cpu_khz; - vxtime.hz = vxtime_hz; - rdtscll_sync(&vxtime.last_tsc); -#endif - - TIMER_IRQ = bind_virq_to_irq(VIRQ_TIMER); - - (void)setup_irq(TIMER_IRQ, &irq_timer); -} - -/* Convert jiffies to system time. Call with xtime_lock held for reading. */ -static inline u64 __jiffies_to_st(unsigned long j) -{ - return processed_system_time + ((j - jiffies) * NS_PER_TICK); -} - -/* - * This function works out when the the next timer function has to be - * executed (by looking at the timer list) and sets the Xen one-shot - * domain timer to the appropriate value. This is typically called in - * cpu_idle() before the domain blocks. - * - * The function returns a non-0 value on error conditions. - * - * It must be called with interrupts disabled. - */ -int set_timeout_timer(void) -{ - u64 alarm = 0; - int ret = 0; -#ifdef CONFIG_SMP - unsigned long seq; -#endif - - /* - * This is safe against long blocking (since calculations are - * not based on TSC deltas). It is also safe against warped - * system time since suspend-resume is cooperative and we - * would first get locked out. - */ -#ifdef CONFIG_SMP - do { - seq = read_seqbegin(&xtime_lock); - if (smp_processor_id()) - alarm = __jiffies_to_st(jiffies + 1); - else - alarm = __jiffies_to_st(jiffies + 1); - } while (read_seqretry(&xtime_lock, seq)); -#else - alarm = __jiffies_to_st(next_timer_interrupt()); -#endif - - /* Failure is pretty bad, but we'd best soldier on. */ - if ( HYPERVISOR_set_timer_op(alarm) != 0 ) - ret = -1; - - return ret; -} - -void time_suspend(void) -{ - /* nothing */ -} - -/* No locking required. We are only CPU running, and interrupts are off. */ -void time_resume(void) -{ - if (timer_tsc_init.init(NULL) != 0) - BUG(); - - /* Get timebases for new environment. */ - __get_time_values_from_xen(); - - /* Reset our own concept of passage of system time. */ - processed_system_time = shadow_system_time; - - /* Accept a warp in UTC (wall-clock) time. */ - last_seen_tv.tv_sec = 0; - - /* Make sure we resync UTC time with Xen on next timer interrupt. */ - last_update_from_xen = 0; -} - -#ifdef CONFIG_SMP -#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg) - -static irqreturn_t local_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) -{ - s64 delta; - int cpu = smp_processor_id(); - - do { - __get_time_values_from_xen(); - - delta = (s64)(shadow_system_time + - ((s64)cur_timer->get_offset() * - (s64)NSEC_PER_USEC) - - per_cpu(processed_system_time, cpu)); - } - while (!TIME_VALUES_UP_TO_DATE); - - if (unlikely(delta < 0)) { - printk("Timer ISR/%d: Time went backwards: %lld %lld %lld %lld\n", - cpu, delta, shadow_system_time, - ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC), - processed_system_time); - return IRQ_HANDLED; - } - - /* Process elapsed jiffies since last call. */ - while (delta >= NS_PER_TICK) { - delta -= NS_PER_TICK; - per_cpu(processed_system_time, cpu) += NS_PER_TICK; - if (regs) - update_process_times(user_mode(regs)); -#if 0 - if (regs) - profile_tick(CPU_PROFILING, regs); -#endif - } - - if (smp_processor_id() == 0) { - xxprint("bug bug\n"); - BUG(); - } - - return IRQ_HANDLED; -} - -static struct irqaction local_irq_timer = { - local_timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ltimer", - NULL, NULL -}; - -void local_setup_timer(void) -{ - int seq, time_irq; - int cpu = smp_processor_id(); - - do { - seq = read_seqbegin(&xtime_lock); - per_cpu(processed_system_time, cpu) = shadow_system_time; - } while (read_seqretry(&xtime_lock, seq)); - - time_irq = bind_virq_to_irq(VIRQ_TIMER); - (void)setup_irq(time_irq, &local_irq_timer); -} -#endif - -/* - * /proc/sys/xen: This really belongs in another file. It can stay here for - * now however. - */ -static ctl_table xen_subtable[] = { - {1, "independent_wallclock", &independent_wallclock, - sizeof(independent_wallclock), 0644, NULL, proc_dointvec}, - {0} -}; -static ctl_table xen_table[] = { - {123, "xen", NULL, 0, 0555, xen_subtable}, - {0} -}; -static int __init xen_sysctl_init(void) -{ - (void)register_sysctl_table(xen_table, 0); - return 0; -} -__initcall(xen_sysctl_init); -- 2.30.2